********************************************************************************
** Stata code for nettwork type and depression research paper
** Author: David Su
** July 4, 21
** Based on cov_netps_nettype_0612
********************************************************************************

******************* Part A: simple recoding ***************************
// generate a file with obs that participate in two waves. 

cd "/Users/davidsu/Documents/Cov-netps E/wave2/cov_netps_nettype_0612/submission/Network Sciences/R code and data/stata_simplified_2025/"
capture mkdir "output"
capture mkdir "figure"
log using "log_file_nettype", replace
log on
use "cov_netps_nettype_0612.dta",clear
sort egoid wave


// first clean up some sociodemographic control vars
** gender1 has been recoded to gender
** recode single1 to single 
drop single
recode single1 (2/3 = 0 "notsingle") (1=1 "single"), gen(single)

** recode quarantine1 to quarantine
drop quarantine
drop quarantine2
recode quarantine1 (1 = 0 "notquarantined") (2=1 "quarantined"), gen(quarantine) 

** construct the health scale
** ces_d depression 
drop ces_d 
gen happy_rev = 7 - happy
gen live_happy_rev = 7 - live_happy 
egen ces_d = rowtotal(sad frustrated depressed hardTOdo sleep happy_rev concentrate live_happy_rev bothering)


** //Replication may need to adjust this based on clustering outcomes. CLusters with more than 20% prediction errors should be dropped.  
** recode cluster_ID, cluster 6 is the unstable cluster (error > 20%) that should be labelled as othernetworks
recode cluster_ID (6= .) 

** keep only obs with both waves
quietly by egoid:  gen dup = cond(_N==1,0,_n)
recode dup (0 = 0 "wave1only") (1/2 = 1 "bothwaves"), gen(bothwave)
gen cluster_ID1 = cluster_ID if wave ==1

** make edu in a consistent scale
recode edu (5=1) (6 =1) (7 =2)

**lag health
sort egoid wave
by egoid: gen ces_d_1 = ces_d[_n-1]
save "output/covnetps_nettype_cleaned.dta",replace

******************************** Part A ends ***********************************


******************* Part B: further data cleaning ******************************
// seperately create two dataet for the wave 1 and wave 2 data
**rerun the above program Part A and then,
use "output/covnetps_nettype_cleaned.dta",clear
keep if wave ==1
save "output/covnetps_nettype_cleaned_w1.dta",replace

use "output/covnetps_nettype_cleaned.dta",clear
keep if wave ==2
save "output/covnetps_nettype_cleaned_w2.dta",replace

** rerun the above program Part A and then,reshape to wide format and compute the change variable
use "output/covnetps_nettype_cleaned.dta",clear
keep egoid wave cluster_ID
reshape wide cluster_ID, i(egoid) j(wave)

gen change_type  = 1
replace change_type  = 0 if cluster_ID1 == cluster_ID2
save "output/changetype.dta", replace
** this file is used to contain the network type and type change information 

** rerun Program A, then
use "output/covnetps_nettype_cleaned.dta",clear

tab bothwave
**there are 1048/2=524 obs participated in both waves.

** combine the changetype info to the current working dataset
merge m:1 egoid using "output/changetype.dta", keepusing(cluster_ID1 cluster_ID2 change_type)

* it seems wave 2 does not have measure of age, collegetype, because they are identical to the first wave
drop age collegetype cluster_ID1 weight_adjusted
drop _merge
merge m:1 egoid using "output/covnetps_nettype_cleaned_w1.dta", keepusing(age collegetype cluster_ID1 weight_adjusted)

** compute the pandemic_related info. in the current dataset 
gen log_confirmed = log(pro_confirmed)
bysort egoid: gen log_confirmed_1 = log_confirmed[_n-1]



******************************** Part B ends ***********************************


*************************** Part C Data analysis ******************************
//prepare for panel data analysis
** //Replication may need to adjust this based on clustering outcomes. 
lab define NETTYPEID 1 "Family" 2 "Friend" 3 "Restricted" 4 "Family&Community" 5 "School&Career" 7 "Homebody" 8 "JustActivity"
lab val cluster_ID NETTYPEID
lab var cluster_ID Network_Type


*** To examine whether individual shifted to a "better" network or a "worse" one,
*** we rank each cluster_ID based on their embedded social support (objective and subjective)
gen exchange_n = kin_exchange_n + nonkin_exchange_n
gen personal_n = kin_personal_n + nonkin_personal_n
gen help_n = kin_help_n + nonkin_help_n
gen entertainment_n = kin_entertainment_n + nonkin_entertainment_n
factor MeetFrd MeetHangout MeetHelp exchange_n personal_n help_n entertainment_n
screeplot
** factor analysis shows that these item reflects a two dimensional sturcture 
** of social support measures, which could be labelled objective and subjective ones

** confirmative factor analysis shows that our two-factor social support model fits well with the dataset
sem (Subjective_support -> MeetFrd, ) (Subjective_support -> MeetHangout, ) (Subjective_support -> MeetHelp, ) (Objective_support -> exchange_n, ) (Objective_support -> personal_n, ) (Objective_support -> help_n, ) (Objective_support -> entertainment_n, ), covstruct(_lexogenous, diagonal) latent(Subjective_support Objective_support ) cov( Subjective_support*Objective_support) nocapslatent
estat gof,stats(all)
** CFA result shows that two-factor model fits quite well, RMSWA = 0.037, CFI >0.990, TLI = 0.983, AIC =  43728.267 , BIC = 43857.012,  chi2_ms(13)= 59.196, p <0.001 
** now estimate a one-factor social support model instead, for comparison
sem (Social_support -> MeetFrd, ) (Social_support -> MeetHangout, ) (Social_support -> MeetHelp, ) (Social_support -> exchange_n, ) (Social_support -> personal_n, ) (Social_support -> help_n, ) (Social_support -> entertainment_n, ), latent(Social_support ) nocapslatent
estat gof,stats(all)
** CFA result shows that a one-factor model does not make sense in the data.

**** save data**** 
save, replace
******************
** map network type against support using only wave 1 data
keep if wave ==1

** rank network types according to net_satisfaction scale, E3 (subjective support)
** (net_satisfaction score = MeetFrd + MeetHangout + MeetHelp)
** for each network type, compute the average standardized network satisfaction score

quietly summarize net_satisfaction
gen net_sat_sd  = (net_satis - r(mean))/r(sd)
reg net_sat_sd i.cluster_ID
predict  net_sat_predicted
lab var net_sat_predicted net_sat_predicted

** rank network types according to social support (objective support)
** for each network type, compute the average standardized objective support score

recode exchange_n (0 = 0 "inadequate") (1 = 1 "marginal") (3/6 = 2 "adequate"), gen(exchange_scale)
recode personal_n (0 = 0 "inadequate") (1 = 1 "marginal") (3/6 = 2 "adequate"), gen(personal_scale)
recode help_n (0 = 0 "inadequate") (1 = 1 "marginal") (3/6 = 2 "adequate"), gen(help_scale)
recode entertainment_n (0 = 0 "inadequate") (1 = 1 "marginal") (3/6 = 2 "adequate"), gen(entertainment_scale)
egen support_n = rowtotal(exchange_n personal_n help_n entertainment_n)
egen support_scale = rowtotal(exchange_scale personal_scale help_scale entertainment_scale)

quietly summarize support_n
gen support_sd  = (support_n - r(mean))/r(sd)
reg support_sd i.cluster_ID
predict support_n_predicted
lab var support_n_predicted support_n_predicted

** visualize each network type's average score on subjective and objective support
scatter net_sat_predicted support_n_pred , mlabel(cluster_ID) yline(0) xline(0)  mlabposition(3) xlab(-1(0.2)1) xtitle("(Low)             Actual social support            (High)") ytitle("(Low)         Perceived social support         (High)")


twoway (scatter net_sat_predicted support_n_pred if net_sat_predicted>0 & support_n_pred>0, mfc(red) mlabel(cluster_ID) mlabpos(3)) (scatter net_sat_predicted support_n_pred if net_sat_predicted<0 & support_n_pred<0, mfc(black) mlabel(cluster_ID) mlabpos(3)) (scatter net_sat_predicted support_n_pred if net_sat_predicted*support_n_pred<0, mfc(blue) mlabel(cluster_ID) mlabpos(3)) , xline(0) yline(0) xlab(-1(0.2)1) xtitle("(Low)             Actual social support            (High)") ytitle("(Low)         Perceived social support         (High)") legend(order(1 "High received & perceived support" 2 "Low received & perceived support" 3 "Mixed: one high, one low"))
graph save "figure/support_scatter_quadrants.gph", replace
graph export "figure/support_scatter_quadrants.pdf", replace

** result shows that type 1,2, and 4 have both above-average objective and subjective support, and should be ranked the highest;
** network type 5 has below-average subjective support, but high objective support;
** network type 8 and 7 has below-average objective support, but high level of subjective support;
** thus, type 6 ,8, and 7 should be ranked as having medium level of support; 
** network type 3 has both below-average objective and subjective support, and should be ranked the lowest in social support. 

******************* 
** put back both wave data
*******************
use "output/covnetps_nettype_cleaned.dta",clear


quietly summarize net_satisfaction
gen net_sat_sd  = (net_satis - r(mean))/r(sd)
reg net_sat_sd i.cluster_ID
predict  net_sat_predicted
lab var net_sat_predicted net_sat_predicted
recode exchange_n (0 = 0 "inadequate") (1 = 1 "marginal") (3/6 = 2 "adequate"), gen(exchange_scale)
recode personal_n (0 = 0 "inadequate") (1 = 1 "marginal") (3/6 = 2 "adequate"), gen(personal_scale)
recode help_n (0 = 0 "inadequate") (1 = 1 "marginal") (3/6 = 2 "adequate"), gen(help_scale)
recode entertainment_n (0 = 0 "inadequate") (1 = 1 "marginal") (3/6 = 2 "adequate"), gen(entertainment_scale)
egen support_n = rowtotal(exchange_n personal_n help_n entertainment_n)
egen support_scale = rowtotal(exchange_scale personal_scale help_scale entertainment_scale)

quietly summarize support_n
gen support_sd  = (support_n - r(mean))/r(sd)
reg support_sd i.cluster_ID
predict support_n_predicted
lab var support_n_predicted support_n_predicted


recode cluster_ID (3 = 1 "lowestsupport") (5 = 2) (7 =2) (8 = 2) (1 = 3) (2 = 3) (4 = 3 "highesupport"), gen(rank_twodimension)
by egoid: gen rank_twodimension_1 = rank_twodimension[_n-1]

*** examine the change of network type among two waves
tab  rank_twodimension rank_twodimension_1 if wave ==2 , col chi2
*** type unchanged: (12+51+170)/438 = 53.196%, (14+22 + 45)/438 = 18.493% have an increased rank, (6+22 + 46)/438 = 16.894 have move downwards
*** if we deemed medium and high support networks are acceptable, but lowest support network as "getting worse", then only 5.797% end up with a worse network 

// panel data analysis
** some indiviudals have not been predicted accurately for their network types due to the settings of previous clustering algorithm in R
count if cluster_ID!=. & wave ==1
count if  cluster_ID!=. & cluster_ID2!=. & wave ==1
drop accurate 
gen accurate = 0 
replace accurate =1 if cluster_ID!=. & cluster_ID2!=. & wave ==1
bysort egoid: egen totalacc = sum(accurate)
tab totalacc acc
drop accurate 
rename totalacc accurate
************************** accurate variable corrected on Dec 19 2021

tab accurate 
** among 524 indiviuals, 876/2=438 of them have complete network type info. in both waves, whom can be used for subsequent analysis


//analysis

gen log_confirm = log(pro_confirmed)


************** import social distancing, age & collegetype variable **************
drop _merge
merge 1:1 egoid wave using "oxford_social_distancing_data.dta"
drop _merge
merge m:1 egoid using "output/covnetps_nettype_cleaned_w1.dta", keepusing(age collegetype)
drop _merge

***************** calculate metrics of openness (social distancing) **************

drop C1_Flag C2_Flag C3_Flag C4_Flag C5_Flag C6_Flag C7_Flag
destring C1_School_closing C2_Workplace_closing C3_Cancel_public_events C4_Restrictions_on_gatherings C5_Close_public_transport C6_Stay_at_home_requirements C7_Restrictions_internal,replace
factor C1_School_closing C2_Workplace_closing C3_Cancel_public_events C4_Restrictions_on_gatherings C5_Close_public_transport C6_Stay_at_home_requirements C7_Restrictions_internal
predict openness 

bysort egoid: gen open_1 = open[_n-1]
gen deltaopen = openness - open_1
bysort province: egen ave_deltaopen = mean(deltaopen)
graph hbar ave_deltaopen, over(province)

// properly label the variable
label var ces_d "Depression (CES-D)"
label var sss "SES"
label var rank_twodimension "Rank of Network Type"
label var single "Single"
label var log_confirm "Confirmed cases (log)"
label var quarantine "Know someone tested positive"
label var openness "Strength of social distancing policy"
lab define NETTYPEID 1 "Family" 2 "Friend" 3 "Restricted" 4 "Family&Community" 5 "School&Career" 7 "Homebody" 8 "JustActivity"
lab val cluster_ID NETTYPEID
lab var cluster_ID Network_Type
label define WAVELAB 1 "First wave" 2 "Second wave", replace
label values wave WAVELAB
label define EDULAB 1 "Undergraduate" 2 "Graduate or above" 3 "Gap year" 4 "Already got a job", replace
label values edu EDULAB


**************** summary stats  ***********************************************

*************** Table 1 ********************************************************


asdoc sum age i.gender single sss i.edu collegetype log_confirm quarantine support_n net_satisfaction ces_d if wave ==1 , dec(3)
asdoc summ age i.gender single sss i.edu collegetype log_confirm quarantine support_n net_satisfaction ces_d  if wave ==2 , dec(3)


******* Table 2: Descriptives by 7 network types (wave 1) **********************

* sample restriction like your note
keep if wave==1


* 1) Sample size by network type
asdoc tab cluster_ID, replace save(output/Table2_simple.doc)

* 2) CATEGORICAL (% within each network type) + Chi-square
asdoc tab gender    cluster_ID, col chi2 append save(output/Table2_simple.doc)
asdoc tab single    cluster_ID, col chi2 append save(output/Table2_simple.doc)
asdoc tab edu       cluster_ID, col chi2 append save(output/Table2_simple.doc)

* 3) CONTINUOUS: Mean (SD) by network type
asdoc tabstat age sss ces_d, by(cluster_ID) stat(mean sd) append save(output/Table2_simple.doc)

* 4) CONTINUOUS tests (F)
asdoc oneway age       cluster_ID, append save(output/Table2_simple.doc)
asdoc oneway sss       cluster_ID, append save(output/Table2_simple.doc)
asdoc oneway ces_d     cluster_ID, append save(output/Table2_simple.doc)
	
* 5) whole sample total sample
asdoc tab gender if cluster_ID!=., append save(output/Table2_simple.doc)
asdoc tab single if cluster_ID!=., append save(output/Table2_simple.doc)
asdoc tab edu if cluster_ID!=., append save(output/Table2_simple.doc)
asdoc tabstat age sss ces_d if cluster_ID!=., stat(mean sd) append save(output/Table2_simple.doc)



************** mlogit predicting the determinent of network type **************
mlogit cluster_ID openness age gender sss i.single i.edu log_confirm i.quarantine i.wave, baseoutcome(3)
outreg2 using "output/mainresults_mlogit.rtf", replace tdec(2) bdec(2) alpha(0.001, 0.01, 0.05)
margins, at(openness=(-2.5(0.5)2.5)) 
marginsplot, xtitle("Strength of local social distancing policies") ytitle("Predicted probability") xlabel(-2.5(0.5)2.5, format(%3.1f)) legend(order(1 "Family" 2 "Friend" 3 "Restricted" 4 "Family&Community" 5 "School&Career" 6 "JustActivity" 7 "Homebody") cols(1))
graph export "figure/mlogit_predict_nettype.pdf", replace



****************************** IPW-FE: Main result******************************

xtset egoid wave
by egoid: egen maxwave = max(wave)
gen R2 = (maxwave >= 2)
drop maxwave


by egoid (wave): gen ces_w1     = ces_d[1]
by egoid (wave): gen sss_w1     = sss[1]
by egoid (wave): gen clus_w1    = cluster_ID[1]
by egoid (wave): gen single_w1  = single[1]
by egoid (wave): gen edu_w1     = edu[1]
by egoid (wave): gen logconf_w1 = log_confirm[1]
by egoid (wave): gen quar_w1    = quarantine[1]
by egoid (wave): gen open_w1    = openness[1]

preserve
keep if wave == 1

logit R2 c.ces_w1 i.clus_w1 c.sss_w1 i.single_w1 i.edu_w1 c.logconf_w1 i.quar_w1 c.open_w1 if !missing(R2)

predict phat if e(sample), pr
summ R2 if e(sample)
scalar pbar = r(mean)

gen sw = .
replace sw = pbar/phat if R2 == 1 & e(sample)

keep egoid sw phat R2
tempfile wts
save `wts', replace
restore

merge m:1 egoid using `wts', nogen


************************ Weighted FE (xtreg) using IPW weights******************


cap erase "output/mainresults_fe_interact.rtf"

* Main FE model (your baseline)
xtset egoid wave

xtreg ces_d ib3.cluster_ID c.sss i.single i.edu i.wave c.log_confirm i.quarantine c.openness [pweight=sw], fe 
eststo m1

* Interaction FE model (your headline)
xtreg ces_d ib3.cluster_ID##c.sss i.single i.edu i.wave c.log_confirm i.quarantine c.openness [pweight=sw], fe 
eststo m2

margins cluster_ID, at(sss= (1(1)10) )
marginsplot, noci title("Predicted Margins, Fixed-effect") ytitle("predicted depression score") xtitle("SES")
graph export "figure/marginsplot_interaction_effect.pdf", replace


xtreg ces_d c.rank_twodimension c.sss i.single i.edu i.wave c.log_confirm i.quarantine c.openness  [pweight=sw], fe 
eststo m3

xtreg ces_d c.rank_twodimension##c.sss i.single i.edu i.wave c.log_confirm i.quarantine c.openness  [pweight=sw], fe 
eststo m4

esttab m1 m2 m3 m4 using "output/mainresults_fe_interact.rtf", replace rtf star(+ 0.10 * 0.05 ** 0.01 *** 0.001) se label
save "output/complementary_analysis.dta", replace


**************************************** Part C  Ends **************************



******************* Part D: complimentary analysis &  attirtion ****************

**************** complimentary analysis **************
use "output/complementary_analysis.dta", clear

cap erase "output/complimentary_fe1"
xtreg ces_d ib3.cluster_ID##c.sss i.single i.edu i.wave log_confirm i.quarantine HS if accurate ==1, fe
est sto c1

** for robustness checks, replicate the above analysis for all 524 indiviuals (unbalanced panel),
** even if some of them are classified in inaccurate clusters in either waves, i.e., have missing values in cluster_ID
xtreg ces_d ib3.cluster_ID##c.sss i.single i.edu i.wave log_confirm i.quarantine if bothwave ==1, fe
est sto c2

xtreg ces_d c.rank_twodimension##c.sss i.single i.edu  i.wave log_confirm i.quarantine if bothwave ==1 , fe
est sto c3

esttab c1 c2 c3 using "output/complimentary_fe1.rtf", replace rtf star(+ 0.10 * 0.05 ** 0.01 *** 0.001) se label

** hausman test confirming the use of fixed effect models
xtreg ces_d ib3.cluster_ID  sss i.single i.edu i.wave log_confirm i.quarantine if accurate ==1, fe
est sto fe
xtreg ces_d ib3.cluster_ID  sss i.single i.edu i.wave log_confirm i.quarantine if accurate ==1, re
est sto re 
hausman fe re

** these checks demonstrates that our results are robust


*******************************************************
* Compare attritors vs complete cases
*******************************************************

********* numeric ************* 

xtset egoid wave
by egoid: egen maxwave = max(wave)
gen R2 = (maxwave >= 2)
drop maxwave

tempfile ttest_results
tempname H

postfile `H' str20 var double g0_mean g1_mean diff se t p N0 N1 using `ttest_results', replace

foreach v in ces_d sss log_confirm openness {
    quietly ttest `v', by(bothwave)
    post `H' ("`v'") (r(mu_1)) (r(mu_2)) (r(mu_2)-r(mu_1)) (r(se)) (r(t)) (r(p)) (r(N_1)) (r(N_2))
}

postclose `H'
use `ttest_results', clear

label var g0_mean "Mean (bothwave=0)"
label var g1_mean "Mean (bothwave=1)"
label var diff "Diff (1-0)"
label var se "Std. Err."
label var t "t"
label var p "p-value"
label var N0 "N (bothwave=0)"
label var N1 "N (bothwave=1)"

format g0_mean g1_mean diff se %9.3f
format t %9.3f
format p %9.4f

export excel using "output/ttest_by_bothwave_wave1.xlsx", firstrow(variables) replace

****************** categorical ******************
use "output/complementary_analysis.dta", clear
asdoc tab single bothwave if wave==1, chi2 row replace save(output/attrition_tabs.doc)
asdoc tab edu bothwave if wave==1, chi2 row append save(output/attrition_tabs.doc)
asdoc tab cluster_ID bothwave if wave==1, chi2 row append save(output/attrition_tabs.doc)
asdoc tab quarantine bothwave if wave==1, chi2 row append save(output/attrition_tabs.doc)

logit bothwave ces_d sss ib3.cluster_ID i.single i.edu c.log_confirm i.quarantine c.openness if wave==1
est sto sa1
outreg2 using "output/ttest_by_bothwave_wave1.xlsx",  append tdec(2) bdec(2) alpha(0.001, 0.01, 0.05)



log off
log close
